<?php

// Extracts a Scala Map for all namespaces in all languages from the MediaWiki Message*.php files.
// Specify /trunk/phase3 directory of MediaWiki as argument.

// separator: "\\" for Windows, "/" for Unix
$SEPARATOR = "\\";

// output path
$namespacesOutputFile = "Namespaces.scala";

// define all languages that should be included
// (if all languages are included, the Scala Map is too big to run)
$dbpediaLanguage = array(
   "en" => 1,
   "de" => 1,
   "fr" => 1,
   "it" => 1,
   "pl" => 1,
   "ja" => 1,
   "es" => 1,
   "nl" => 1,
   "pt" => 1,
   "ru" => 1,
   "sv" => 1,
   "zh" => 1,
   "ca" => 1,
   "no" => 1,
   "fi" => 1,
   "uk" => 1,
   "hu" => 1,
   "cs" => 1,
   "ro" => 1,
   "tr" => 1,
   "ko" => 1,
   "vi" => 1,
   "da" => 1,
   "ar" => 1,
   "eo" => 1,
   "sr" => 1,
   "id" => 1,
   "lt" => 1,
   "vo" => 1,
   "sk" => 1,
   "he" => 1,
   "bg" => 1,
   "fa" => 1,
   "sl" => 1,
   "war" => 1,
   "hr" => 1,
   "et" => 1,
   "ms" => 1,
   "new" => 1,
   "simple" => 1,
   "gl" => 1,
   "th" => 1,
   "roa-rup" => 1,
   "nn" => 1,
   "eu" => 1,
   "hi" => 1,
   "el" => 1,
   "ht" => 1,
   "la" => 1,
   "te" => 1,
   "ka" => 1,
   "ceb" => 1,
   "mk" => 1,
   "az" => 1,
   "tl" => 1,
   "br" => 1,
   "sh" => 1,
   "mr" => 1,
   "lb" => 1,
   "jv" => 1,
   "lv" => 1,
   "bs" => 1,
   "is" => 1,
   "cy" => 1,
   "be-x-old" => 1,
   "pms" => 1,
   "sq" => 1,
   "ta" => 1,
   "bpy" => 1,
   "be" => 1,
   "an" => 1,
   "oc" => 1,
   "bn" => 1,
   "sw" => 1,
   "io" => 1,
   "ksh" => 1,
   "lmo" => 1,
   "fy" => 1,
   "gu" => 1,
   "nds" => 1,
   "af" => 1,
   "scn" => 1,
   "qu" => 1,
   "ku" => 1,
   "ur" => 1,
   "su" => 1,
   "ml" => 1,
   "zh-yue" => 1,
   "ast" => 1,
   "nap" => 1,
   "bat-smg" => 1,
   "wa" => 1,
   "ga" => 1,
   "cv" => 1,
   "hy" => 1,
   "yo" => 1
);

echo "Script to generate MediaWiki namespace identifiers lexicon for different languages." . PHP_EOL;

// argument: path to MediaWiki directory
if($argc <= 1)
{
    die("path to MediaWiki's trunk/phase3 directory not specified");
}

// WARNING
echo "WARNING: any manual changes you made to Namespaces.scala will be lost! continue (y/n)? ";
if(strtolower(trim(fgets(STDIN))) != "y")
{
    die("aborted");
}

$mediawikiPath = $argv[1];
if(substr_compare($mediawikiPath, $SEPARATOR, strlen($mediawikiPath)-strlen($SEPARATOR), strlen($SEPARATOR)) === 0)
{
    $mediawikiPath = substr($mediawikiPath, 0, strlen($mediawikiPath)-strlen($SEPARATOR));
}
$messagesPath = $mediawikiPath . $SEPARATOR . "languages" . $SEPARATOR . "messages";

// load MediaWiki defines
require $mediawikiPath . $SEPARATOR . "includes" . $SEPARATOR . "Defines.php";


// get MediaWiki language files
echo "Reading namespaces from " . $messagesPath . " ..." . PHP_EOL;
$filesArray = getLanguageFiles($messagesPath);


// write imports and functions
$resultString = 'package org.dbpedia.extraction.wikiparser.impl.wikipedia' . PHP_EOL . PHP_EOL;
$resultString .= 'import org.dbpedia.extraction.util.Language' . PHP_EOL;
$resultString .= 'import org.dbpedia.extraction.wikiparser.WikiTitle' . PHP_EOL . PHP_EOL;
$resultString .= '/**' . PHP_EOL;
$resultString .= ' * Holds the namespace names of all Wikipedia languages.' . PHP_EOL;
$resultString .= ' * Generated by GenerateNamespaceMap.php.' . PHP_EOL;
$resultString .= ' */' . PHP_EOL . PHP_EOL;

$resultString .= 'object Namespaces' . PHP_EOL . '{' . PHP_EOL . PHP_EOL;

// build string for scala Map ...
$resultString .= '    private val languageNamespaceMap = Map(' . PHP_EOL;

$firstOuter = true;
foreach ($filesArray as $fileAndLanguage)
{
    // make full path to language file
    $file = $fileAndLanguage[0];
    $fileWithPath = $messagesPath . $SEPARATOR . $file;

    // lower-case language
    $language = strtolower($fileAndLanguage[1]);

    // in other places, wikipedia uses '-' instead of '_'
    $language = str_replace('_', '-', $language);

    // load $namespaceNames and $namespaceAliases associative arrays
    require $fileWithPath;

    if (empty($namespaceNames) || !$dbpediaLanguage[$language])
    {
        continue;
    }

    // close brackets for last language entry, except for first iteration
    if (!$firstOuter)
    {
        $resultString .= '        ),' . PHP_EOL;
    }

    // open brackets for language entry
    $resultString .= '        "' . $language. '" -> Map(' . PHP_EOL;

    // add Wikipedia namespace
    $resultString .= '            "Wikipedia" -> 4,' . PHP_EOL;

    $firstInner = true;
    foreach ( $namespaceNames as $id => $ns )
    {
        // skip entries for empty values
        if ($ns != "")
        {
            // close brackets for last namespace entry, except for first iteration
            if (!$firstInner)
            {
                $resultString .= ',' . PHP_EOL;
            }

            // for "$1_discussion" strings, replace $1 with Wikipedia
            $ns = str_replace('$1', 'Wikipedia', $ns);

            // add namespace entry to the string
            $resultString .= '            "' . $ns . '" -> ' . $id;
        }

        $firstInner = false;
    }

    foreach ( $namespaceAliases as $altNamespace => $id )
    {
    // close brackets for last namespace entry, except for first iteration
        if (!$firstInner)
        {
            $resultString .= ',' . PHP_EOL;
        }
        $resultString .= '            "' . $altNamespace . '" -> ' . $id;
        $firstInner = false;
    }

    $resultString .= PHP_EOL;

    // clear source arrays
    $namespaceNames = array();
    $namespaceAliases = array();
    $reversedNamespaceAliases = array();

    $firstOuter = false;
}

$resultString .= '        )' . PHP_EOL;
$resultString .= '    )' . PHP_EOL . PHP_EOL;
$resultString .= '    private val reverseMap = languageNamespaceMap.map{' . PHP_EOL;
$resultString .= '        case (lang,languageMap) => (lang, languageMap.map{case (name, code) => (code, name)}.toMap)' . PHP_EOL;
$resultString .= '    }.toMap' . PHP_EOL . PHP_EOL;
$resultString .= '    def getAllWikiCodes() : Set[String] =' . PHP_EOL;
$resultString .= '    {' . PHP_EOL;
$resultString .= '        languageNamespaceMap.keySet.toSet' . PHP_EOL;
$resultString .= '    }' . PHP_EOL . PHP_EOL;
$resultString .= '    def getNameForNamespace(language : Language, ns : WikiTitle.Namespace) : String =' . PHP_EOL;
$resultString .= '    {' . PHP_EOL;
$resultString .= '        reverseMap.get(language.wikiCode) match' . PHP_EOL;
$resultString .= '        {' . PHP_EOL;
$resultString .= '            case Some(m) => m.get(ns.id) match' . PHP_EOL;
$resultString .= '            {' . PHP_EOL;
$resultString .= '                case Some(namespace) => namespace' . PHP_EOL;
$resultString .= '                case None => throw new IllegalArgumentException("namespace number "+ns.id+" not found for language "+language.wikiCode)' . PHP_EOL;
$resultString .= '            }' . PHP_EOL;
$resultString .= '            case None => throw new IllegalArgumentException("no namespace identifiers found for language "+language.wikiCode)' . PHP_EOL;
$resultString .= '        }' . PHP_EOL;
$resultString .= '    }' . PHP_EOL . PHP_EOL;
$resultString .= '    def apply(language : Language, namespace : String) : Option[Int] =' . PHP_EOL;
$resultString .= '    {' . PHP_EOL;
$resultString .= '        languageNamespaceMap.get(language.wikiCode) match' . PHP_EOL;
$resultString .= '        {' . PHP_EOL;
$resultString .= '            case Some(namespaceMap) => namespaceMap.get(namespace)' . PHP_EOL;
$resultString .= '            case None => None' . PHP_EOL;
$resultString .= '        }' . PHP_EOL;
$resultString .= '    }' . PHP_EOL . PHP_EOL;
$resultString .= '}';


saveStringToFile($resultString, $namespacesOutputFile);


function saveStringToFile($s, $fname)
{
    if (!$fileHandle = fopen($fname, "wb"))
    {
        die ("File not found " . $fname);
    }

    echo "Writing namespaces Scala Map to " .  $fname . " ...";
    fwrite($fileHandle, $s);
    if(!fclose($fileHandle))
    {
        echo "error closing " . $fname;
    }
}

function getLanguageFiles($path)
// returns an array of two-element arrays [file_name, language]
{
    $filesWithLanguages = array();
	if ($dir = opendir($path))
	{
		while(false !== ($file = readdir($dir)))
		{
			if (!is_dir($file) && (preg_match("~Messages(.*)\.php~", $file, $matches)))
			{
				$filesWithLanguages[] = $matches;
			}
		}
		closedir($dir);
	}
	else
	{
		die("Please set path for <MediaWiki>/languages/messages directory first!");
	}
	return $filesWithLanguages;
}
